import requests
from bs4 import BeautifulSoup

if __name__ == "__main__":
    # ua 伪装
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36 Edg/97.0.1072.62"
    }
    # 网站是随机返回的章节
    response = requests.get("http://www.shicimingju.com/book/sanguoyanyi.html", headers=headers)
    response.encoding = "utf-8"  # gzip解码
    responseText = response.text
    soup = BeautifulSoup(responseText, "lxml")
    # print(soup.select(".book-mulu > ul > li"))
    # print(soup.select(".book-mulu li"))
    # print(soup.select(".book-mulu > ul > li > a"))
    # print(soup.select(".book-mulu > ul a"))
    fp = open("./file/sanguo.txt", "w", encoding="utf-8")
    for a in soup.select(".book-mulu > ul > li > a"):
        title = a.string
        detailResponse = requests.get("http://www.shicimingju.com" + a['href'], headers=headers)
        detailResponse.encoding = "utf-8"
        detailSoup = BeautifulSoup(detailResponse.text, "lxml")
        # content = detailSoup.find('div',class_='chapter_content').text
        content = detailSoup.select(".chapter_content")[0].text
        fp.write(title + ':' + content + '\n')
        print(title, '爬取成功！！！')
